#delimit;
clear all;
set more off;

capture log using "06_Merge_Grantee_Financial_and_Governance_Data_with_DAFs.log", replace;
/*******************************************************************************************/;
** INPUT: NBER/AWS data on financial, governance, efficiency of grantees;
** OUTPUT: Combined dataset at DAF-year level with Percent grant $ given to different types of grantees ;
** LAST MODIFIED: JAG, 9/4/2021;
** RUN TIME: <5 minutes using Stata 15 on computer with 2 x 2.60GHz processors and 64GB RAM;
/*******************************************************************************************/;


use "Grants detail with inequality.dta", clear;
rename grantee_ein gr_ein;
merge m:1 gr_ein year using "NBER Grantee for merge.dta";
drop if _m==2;
drop _m;
merge m:1 ein year using "Main with additional details.dta";
** Essentially we want to keep both those that made grants and those that do not;
keep if _m!=1;
drop _m;
codebook(gr_ein);
sum dom_grant_amt, d;
** Note: there are 500 grants that are listed as negative, that's impossible;
replace dom_grant_amt=abs(dom_grant_amt);

* Given that different DAFs can give to the same grantee, we only want one observation per grantee to determine if that grantee is high inequality, etc...;
foreach v in lag_gr_mgmt_eff lag_gr_adm_eff lag_gr_lage lag_gr_ind_f lag_gr_f2exp 
lag_gr_f2contr lag_gr_pct_fevents lag_gr_mgn_fevent lag_gr_mjr75_frev lag_gr_mjr90_frev 
lag_gr_tot_ecomp lag_gr_highest_ecomp lag_gr_100k_comp lag_gr_100k_contr
lag_gr_mths_luna lag_gr_mths_liquid lag_gr_gift_dep lag_gr_comm_np lag_gr_prof_a 
lag_gr_prof_alt lag_gr_prof_ind lag_gr_uses_auditor lag_gr_volat lag_bgov_tot 
lag_bgov_max lag_bgov1 lag_bgov2 lag_bgov3 lag_bgov4 lag_bgov5 lag_bgov6 lag_gr_tot_asset lag_gr_unr_asset {;
	bys gr_ein: gen `v'_one_obs = `v' if _n == 1;
	egen med_`v' = median(`v'_one_obs);
	gen byte high_`v' = (`v' > med_`v') if !mi(`v');
	drop med_`v' `v'_one_obs;
};

** Next goal is to aggregate to DAF-year level;
bys ein year: egen grants_tot = total(dom_grant_amt);
** Inequality, financial, and governance metrics that are continuous;
foreach var in ineq ineq2 lag_gr_mths_luna lag_gr_mths_liquid lag_gr_volat lag_gr_prof_a lag_gr_prof_alt 
lag_gr_lage lag_bgov_tot lag_gr_adm_eff lag_gr_mgmt_eff lag_gr_tot_ecomp lag_gr_highest_ecomp 
lag_gr_pct_fevents lag_gr_f2exp lag_gr_f2contr lag_gr_mgn_fevent lag_gr_pct_fevent 
lag_bgov1 lag_bgov2 lag_bgov3 lag_bgov4 lag_bgov5 lag_bgov6 lag_gr_tot_asset lag_gr_unr_asset {;
	gen dga_`var' = high_`var' * dom_grant_amt;
};

** For financial and governance metrics that are already indicator variables;
foreach var in lag_gr_comm_np lag_gr_gift_dep lag_gr_prof_ind lag_gr_mjr75_frev lag_gr_mjr90_frev lag_gr_ind_f 
lag_bgov_max lag_gr_uses_auditor lag_gr_100k_comp lag_gr_100k_contr {;
	gen dga_`var' = `var' * dom_grant_amt;
};

rename dom_grant_amt dga;
gen dga_count=1 if dga!=0 & dga!=.;
gen dga_avg=dga;
gen sd_dga=dga;
** Will want to know both the avg per year and the average;
gen dga_count_avg=dga_count;

** Want to add a few more cut-offs for contribution size;
foreach v in lag_contrib_rev lag_gr_tot_asset lag_gr_unr_asset {;	
	bys gr_ein: gen `v'_one_obs = `v' if _n == 1;
	sum `v'_one_obs, d;
	gen tp75_`v'=r(p75);
	gen tp90_`v'=r(p90);
	gen tp25_`v'=r(p25);
	gen byte p25_`v' = (`v' < tp25_`v') if !mi(`v');
	gen byte p75_`v' = (`v' > tp75_`v') if !mi(`v');
	gen byte p90_`v' = (`v' > tp90_`v') if !mi(`v');
	drop tp25_`v' tp75_`v' tp90_`v' `v'_one_obs;
	gen dga_p25_`v' = p25_`v' * dga;
	gen dga_p75_`v' = p75_`v' * dga;
	gen dga_p90_`v' = p90_`v' * dga;
};
gen lag_bgov14=lag_bgov1+lag_bgov2+lag_bgov3+lag_bgov4;
gen lag_bgov56=lag_bgov5+lag_bgov6;
foreach v in lag_bgov14 lag_bgov56 lag_contrib_rev {;
	bys gr_ein: gen `v'_one_obs = `v' if _n == 1;
	egen med_`v' = median(`v'_one_obs);
	gen byte high_`v' = (`v' > med_`v') if !mi(`v');
	drop med_`v' `v'_one_obs;
};
gen dga_lag_contrib_rev= high_lag_contrib_rev*dga;
gen dga_lag_bgov14=high_lag_bgov14*dga;
gen dga_lag_bgov56=high_lag_bgov56*dga;
save "to examine.dta", replace;


use "to examine.dta", clear;
collapse (sd) sd_dga (sum) dga* (mean) high_* p25_* p75* p90* lag_gr_prof_ind lag_gr_age lag_gr_tot_ecomp lag_gr_highest_ecomp 
lag_gr_100k_comp lag_gr_100k_contr lag_gr_comm_np lag_gr_mjr90_frev lag_gr_mjr75_frev, by(ein year);

merge m:1 ein using "DAF classification clean.dta";
keep if _m==3;
drop _m;

** Creating a value-weighted version;
foreach x in dga_ineq dga_ineq2 dga_lag_gr_prof_alt 
dga_lag_gr_mths_luna dga_lag_gr_mths_liquid dga_lag_gr_volat  
dga_lag_gr_gift_dep dga_lag_gr_ind_f dga_lag_gr_f2contr 
dga_lag_gr_mgn_fevent dga_lag_gr_adm_eff dga_lag_gr_mgmt_eff 
dga_lag_bgov_max dga_lag_bgov_tot dga_lag_bgov14 dga_lag_bgov56
dga_lag_gr_uses_auditor dga_lag_bgov5 
dga_lag_gr_highest_ecomp dga_lag_gr_100k_comp dga_lag_gr_100k_contr
dga_lag_gr_pct_fevents dga_lag_gr_f2exp dga_lag_gr_mjr90_frev dga_lag_gr_mjr75_frev
dga_lag_contrib_rev dga_p90_lag_contrib_rev dga_p75_lag_contrib_rev dga_p25_lag_contrib_rev
dga_lag_gr_prof_ind dga_lag_gr_comm_np dga_lag_gr_tot_asset dga_lag_gr_unr_asset{;
	gen vw_`x'=`x'/dga;
	format `x' %2.1fc;
};

save "all grantee stats by daf.dta", replace;

** Merge in additional DAF characteristics so as to potentially explain variation;
use "all grantee stats by daf.dta", clear;
merge 1:1 ein year using "Main with additional details.dta";
gen has_grants=(_m==3);
drop _m;
** NOTE: State is a key variable we want to control but a few legal_dom appear to be mistyped so adjusting;
gen l=length(legal_dom);
list ein if l>2;
replace legal_dom="FL" if ein=="26-0724604";
replace legal_dom="MI" if ein=="38-2530980";
replace legal_dom="CA" if ein=="41-2269686";
replace legal_dom="CA" if ein=="47-2086507";
replace legal_dom="TN" if ein=="62-1341655";
replace legal_dom="TN" if ein=="62-1493663";
replace legal_dom="TX" if ein=="75-0253360";
replace legal_dom=lower(trim(legal_dom));
rename legal_dom daf_state;
save "DAF-Grants combined for regs at DAF-year level.dta", replace;

log close;
